{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "zmpDFA3bGEHY"
   },
   "source": [
    "Minute creator in Gradio from day 5 of week 3.\n",
    "A couple of points to note:\n",
    "\n",
    "\n",
    "*   My access to llama hasn't been approved on Hugging Face and so I've experimented with some of the other models.\n",
    "*   There is a fair bit of debugging code in the main function as I was getting an error and couldn't find it.  I've left it in just in case its useful for others trying to debug their code.\n",
    "*   I was debugging with the help of Claude.  It suggested using <with torch.no_grad()> for the minute output.  The rationale is that it disables gradient computation which isn't necessary for inference and I found it did speed things up.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "l-5xKLFeJUGz"
   },
   "outputs": [],
   "source": [
    "!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai httpx==0.27.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Wi-bBD9VdBMo"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import requests\n",
    "from openai import OpenAI\n",
    "from IPython.display import Markdown, display, update_display\n",
    "from google.colab import drive\n",
    "from huggingface_hub import login\n",
    "from google.colab import userdata\n",
    "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
    "import torch\n",
    "import gradio as gr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "-0O-kuWtdk4I"
   },
   "outputs": [],
   "source": [
    "# keys\n",
    "\n",
    "#openai\n",
    "openai_api_key = userdata.get('OPENAI_API_KEY')\n",
    "openai = OpenAI(api_key=openai_api_key)\n",
    "\n",
    "#hf\n",
    "hf_token = userdata.get('HF_TOKEN')\n",
    "login(hf_token, add_to_git_credential=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "u6v3Ecileg1H"
   },
   "outputs": [],
   "source": [
    "# constants\n",
    "\n",
    "AUDIO_MODEL = 'gpt-4o-transcribe'\n",
    "OPENAI_MODEL = 'gpt-4o-mini'\n",
    "QWEN2_MODEL = 'Qwen/Qwen2.5-7B-Instruct' # runs slowly no matter what size gpu - kept crashing on ram!\n",
    "GEMMA2_MODEL = \"google/gemma-2-2b-it\" # doesn't use a system prompt\n",
    "PHI3 = \"microsoft/Phi-3-mini-4k-instruct\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "3nSfA_KhfY38"
   },
   "outputs": [],
   "source": [
    "# convert audio to text\n",
    "\n",
    "def transcribe_audio(audio_file_path):\n",
    "  try:\n",
    "    with open (audio_file_path, 'rb') as audio_file:\n",
    "      transcript = openai.audio.transcriptions.create(model = AUDIO_MODEL, file = audio_file, response_format=\"text\")\n",
    "    return transcript\n",
    "  except Exception as e:\n",
    "    return f\"An error occurred: {str(e)}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "OVmlY3DGgnYc"
   },
   "outputs": [],
   "source": [
    "# use transcript to create minutes\n",
    "# use open source model\n",
    "\n",
    "def create_minutes(transcript):\n",
    "\n",
    "  # first try is for debugging\n",
    "  try:\n",
    "    print(f\"Starting to create minutes with transcript length: {len(str(transcript))}\")\n",
    "\n",
    "    if not transcript or len(str(transcript).strip()) == 0:\n",
    "      return \"Error: Empty or invalid transcript\"\n",
    "\n",
    "    #messages\n",
    "    system_prompt = \"You are an expert creator of meeting minutes.  Based on a meeting transcript you can summarise the meeting title and date, attendees, key discussion points, key outcomes, actions and owners and next steps.  Respond in Markdown.\"\n",
    "    user_prompt = f\"Create meeting minutes from the transcript provided.  The minutes should be clear but succint and should include title and date, attendees, key discussion points, key outcomes, actions and owners, and next steps. {transcript}\"\n",
    "\n",
    "    messages = [\n",
    "      {\"role\":\"system\",\"content\":system_prompt},\n",
    "      {\"role\":\"user\",\"content\":user_prompt}\n",
    "    ]\n",
    "    print(\"Messages prepared successfully\") # for debugging\n",
    "\n",
    "    # quantisation (for os model)\n",
    "\n",
    "    quantization_config = BitsAndBytesConfig(\n",
    "      load_in_4bit=True,\n",
    "      bnb_4bit_use_double_quant=True,\n",
    "      bnb_4bit_quant_type=\"nf4\",\n",
    "      bnb_4bit_compute_dtype=torch.bfloat16\n",
    "    )\n",
    "\n",
    "  except Exception as e:\n",
    "    return f\"An error occurred in setup: {str(e)}\"\n",
    "\n",
    "  # model & tokeniser\n",
    "  try:\n",
    "    print(\"Loading tokeniser....\")   # for debugging\n",
    "    tokenizer = AutoTokenizer.from_pretrained(PHI3)\n",
    "    tokenizer.pad_token = tokenizer.eos_token\n",
    "\n",
    "    print(\"Loading model.....\")  # for debugging\n",
    "    model = AutoModelForCausalLM.from_pretrained(PHI3, device_map='auto', quantization_config=quantization_config)\n",
    "    print(f\"Model loaded on device {model.device}\") # for debugging\n",
    "\n",
    "  # chat template\n",
    "    inputs = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n",
    "    model_inputs = tokenizer(inputs, return_tensors=\"pt\").to(model.device)\n",
    "\n",
    "  # torch.no_grad suggested by claude.  This disables gradient computation which reduces memory usage and speeds things up\n",
    "    print(\"Generating text....\") # for debugging\n",
    "    with torch.no_grad():\n",
    "      outputs = model.generate(**model_inputs, max_new_tokens=2000, do_sample=True, temperature=0.7)\n",
    "    print(f\"Generation complete. Output shape: {outputs.shape}\") # for debugging\n",
    "\n",
    "  #***debugging****\n",
    "\n",
    "    # Decode the generated text (excluding the input prompt)\n",
    "    print(\"Starting text decoding...\") # debugging\n",
    "    input_length = len(model_inputs['input_ids'][0]) # debugging\n",
    "    print(f\"Input length: {input_length}, Output length: {len(outputs[0])}\") # debugging\n",
    "\n",
    "    if len(outputs[0]) <= input_length: # debugging\n",
    "        return \"Error: Model didn't generate any new tokens. Try reducing input length or increasing max_new_tokens.\" # debugging\n",
    "\n",
    "    generated_tokens = outputs[0][input_length:] # debugging\n",
    "    print(f\"Generated tokens length: {len(generated_tokens)}\") # debugging\n",
    "\n",
    "  # decode generated text\n",
    "    generated_text = tokenizer.decode(outputs[0][len(model_inputs['input_ids'][0]):],skip_special_tokens=True)\n",
    "    print(f\"Decoded text length: {len(generated_text)}\")\n",
    "\n",
    "    return generated_text.strip()\n",
    "\n",
    "  except ImportError as e:\n",
    "      return f\"Import error - missing library: {str(e)}. Please install required packages.\"\n",
    "  except torch.cuda.OutOfMemoryError as e:\n",
    "      return f\"CUDA out of memory: {str(e)}. Try reducing max_new_tokens to 500 or use CPU.\"\n",
    "  except RuntimeError as e:\n",
    "      return f\"Runtime error: {str(e)}. This might be a CUDA/device issue.\"\n",
    "  except Exception as e:\n",
    "      return f\"Unexpected error during text generation: {type(e).__name__}: {str(e)}\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "c63zzoDopw6u"
   },
   "outputs": [],
   "source": [
    "# create process for gradio\n",
    "\n",
    "def gr_process(audio_file, progress = gr.Progress()):\n",
    "\n",
    "  if audio_file is None:\n",
    "    return \"Please provide an audio file\"\n",
    "\n",
    "  try:\n",
    "    progress(0, desc=\"Analysing file\")\n",
    "    transcript = transcribe_audio(audio_file)\n",
    "\n",
    "    if transcript.startswith(\"An error occurred\"):\n",
    "      return transcript\n",
    "\n",
    "    progress(0.5, desc=\"File analysed, generating minutes\")\n",
    "\n",
    "    minutes = create_minutes(transcript)\n",
    "    progress(0.9, desc=\"Nearly there\")\n",
    "\n",
    "    return minutes\n",
    "\n",
    "  except Exception as e:\n",
    "    return f\"An error occurred: {str(e)}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "82fyQELQkGty"
   },
   "outputs": [],
   "source": [
    "# gradio interface\n",
    "\n",
    "demo = gr.Interface(\n",
    "    fn=gr_process,\n",
    "    inputs= gr.Audio(type=\"filepath\",label=\"Upload MP3 file\"),\n",
    "    outputs= gr.Markdown(label=\"Meeting minutes\"),\n",
    "    title = \"Meeting minute creator\",\n",
    "    description = \"Upload an mp3 audio file for a meeting and I will provide the minutes!\"\n",
    ")\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "  demo.launch(debug=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "XljpyS7Nvxkh"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "gpuType": "T4",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
